***Replication File for "Forecasting Migration Movements using Prediction Markets" as of December 21, 2022***

***PARTICIPATION***
import delimited "/Users/admin/Dropbox/PM_alberto_oliver/projects/migration2020/migration_2020_anonymized.csv", clear 

*insert data into excel comma-seperated, delete the "
drop if market_id!="xmwm5w3wcEFKuhhWb"

*daily
gen clockstring = time
gen double daily = date(clockstring, "YMDhm")
format daily %td
encode userlogin, gen(user_nr)
encode contract_id, gen(contract_nr)
collapse (count) trade_amount,  by(daily user_nr contract_nr)
*collapse (sum) trade_amount (count) contract_nr, by(user_nr daily)
collapse (sum) trade_amount (count) user_nr, by(daily)
drop if daily > daily("18may2020","YMDhm")
*drop opening day
drop in 1
*fill in zeros
tsset daily
tsfill 
replace trade_amount=0 if trade_amount==.
*replace contract_nr=0 if contract_nr==.
replace user_nr=0 if user_nr==.

*FIGURE 1
*set scheme
set scheme tab1
set scheme white_tableau
twoway (bar trade_amount daily, yaxis(1)) (line user_nr daily, yaxis(2))


***INTERPOLATE THE DATA***

***GERMANY***
import delimited "migration_2020_anonymized.csv", encoding(ISO-8859-1)clear
*insert data into excel comma-seperated, delete the "
drop if market_id!="xmwm5w3wcEFKuhhWb"
drop if set_id!="J92WZK5uABziwrosz"
rename prices contract4
rename v19 contract5
rename v20 contract1
rename v21 contract2
rename v22 contract3

replace contract1 = subinstr(contract1, "hDe2ejSkM35AMyXAL: ", "", .)
replace contract2 = subinstr(contract2, "pAdETm3Sg772HMWoT: ", "", .)
replace contract3 = subinstr(contract3, "pJecSpwSXpz4ZxBP2: ", "", .)
replace contract4 = subinstr(contract4, "4BTFqGahpWbk9po9u: ", "", .)
replace contract5 = subinstr(contract5, "JDHg6foQoeNRbJz9L: ", "", .)

destring contract1, replace
destring contract2, replace
destring contract3, replace
destring contract4, replace
destring contract5, replace

gen clockstring = time

*collapse at the level of the minutes
collapse (first) contract1 contract2 contract3 contract4 contract5,  by(clockstring)

***generate contracts 0, 800, 1k, 1.2k, 1.4k & 2.2k (symmetric max.!)
g contract0=0
egen contract2_=rowtotal(contract1 - contract2)
egen contract3_=rowtotal(contract1 - contract3)
egen contract4_=rowtotal(contract1 - contract4)
egen contract5_=rowtotal(contract1 - contract5)

drop contract2 contract3 contract4 contract5
ren contract1 contract800
ren contract2_ contract1000
ren contract3_ contract1200
ren contract4_ contract1400
ren contract5_ contract2200

*reshape the values so 
list
reshape long contract, i(clockstring) j(probability)
set more off

*round two digits
g contract_r = contract*1000
g contract_r2= round(contract_r, 1)

drop contract contract_r
ren contract_r2 contract

*reshape again but using probabilities
 reshape wide probability, i(clockstring) j(contract) 
set more off

 *fill will the probabilities we are looking for
g probability10000=.
g probability50000=.
g probability90000=.

*reshape so we can do the mipolation
 reshape long probability, i(clockstring) j(contract) 
 set more off
 
*set back the digits
g contract_r= contract/1000
drop contract
ren contract_r contract 

*mipolation, go, go
by clockstring: mipolate probability contract, pchip generate(i_probability)
drop probability
ren i_probability probability

*let's keep only the interesting information...
drop if contract <10
drop if contract>10 & contract<50
drop if contract>50 & contract<90
drop if contract>90

*...so we can then reshape properly (last time I reshape, promise)
 reshape wide probability, i(clockstring) j(contract) 

*reverse the scale if referendum
gen p10=probability90
gen p50=probability50
gen p90=probability10

*and to the daily data
gen double daily = date(clockstring, "YMDhm")
format daily %td
collapse (mean) p10 p50 p90, by(daily)

*result
gen result2020=994.819
gen result2019=1345.943
*tsforecasts (see below)
gen drift=1416.137
gen _smooth=1408.482
gen _arima=1430.052

gen country="Germany"
gen issue="immigration"

save "Germany_immigration2020", replace


****************************************************************
***SPAIN***
import delimited "migration_2020_anonymized.csv", encoding(ISO-8859-1)clear
*insert data into excel comma-seperated, delete the "
drop if market_id!="xmwm5w3wcEFKuhhWb"
drop if set_id!="ZRuSzJPPTYXtxP4zT"
rename prices contract5
rename v19 contract4
rename v20 contract1
rename v21 contract3
rename v22 contract2

replace contract1 = subinstr(contract1, "ff4t3QBp7nQqJtCsQ: ", "", .)
replace contract2 = subinstr(contract2, "vkf85qJoQXzSkuqqo: ", "", .)
replace contract3 = subinstr(contract3, "h29SFxqtZxGDfpccP: ", "", .)
replace contract4 = subinstr(contract4, "FJzxvTHe6FcAZNfoB: ", "", .)
replace contract5 = subinstr(contract5, "943miiXyzFnr456T8: ", "", .)

destring contract1, replace
destring contract2, replace
destring contract3, replace
destring contract4, replace
destring contract5, replace

gen clockstring = time

*collapse at the level of the minutes
collapse (first) contract1 contract2 contract3 contract4 contract5,  by(clockstring)

***generate contracts 0, 400, 500, 600, 700 & 1100 (symmetric max.!)
g contract0=0
egen contract2_=rowtotal(contract1 - contract2)
egen contract3_=rowtotal(contract1 - contract3)
egen contract4_=rowtotal(contract1 - contract4)
egen contract5_=rowtotal(contract1 - contract5)

drop contract2 contract3 contract4 contract5
ren contract1 contract400
ren contract2_ contract500
ren contract3_ contract600
ren contract4_ contract700
ren contract5_ contract1100

*reshape the values so 
list
reshape long contract, i(clockstring) j(probability)
set more off

*round two digits
g contract_r = contract*1000
g contract_r2= round(contract_r, 1)

drop contract contract_r
ren contract_r2 contract

*reshape again but using probabilities
 reshape wide probability, i(clockstring) j(contract) 
set more off

 *fill will the probabilities we are looking for
g probability10000=.
g probability50000=.
g probability90000=.

*reshape so we can do the mipolation
 reshape long probability, i(clockstring) j(contract) 
 set more off
 
*set back the digits
g contract_r= contract/1000
drop contract
ren contract_r contract 

*mipolation, go, go
by clockstring: mipolate probability contract, pchip generate(i_probability)
drop probability
ren i_probability probability

*let's keep only the interesting information...

drop if contract <10
drop if contract>10 & contract<50
drop if contract>50 & contract<90
drop if contract>90

*...so we can then reshape properly (last time I reshape, promise)
 reshape wide probability, i(clockstring) j(contract) 

*reverse the scale if referendum
gen p10=probability90
gen p50=probability50
gen p90=probability10

*and to the daily data
gen double daily = date(clockstring, "YMDhm")
format daily %td
collapse (mean) p10 p50 p90, by(daily)

***Comparison with result
*https://www.ine.es/consul/serie.do?d=true&s=EM51131 : 219,483+246,238=465,721
gen result2020=415.150
gen result2019=666.022
*tsforecasts (see below)
gen drift=674.9902
gen _smooth=533.6156
gen _arima=763.3158

gen country="Spain"
gen issue="immigration"

save "Spain_immigration2020", replace


****************************************************************
***SWITZERLAND***
import delimited "migration_2020_anonymized.csv", encoding(ISO-8859-1)clear
*insert data into excel comma-seperated, delete the "
drop if market_id!="xmwm5w3wcEFKuhhWb"
drop if set_id!="SPMe8k7ZsK6pcXEQe"
rename prices contract5
rename v19 contract2
rename v20 contract1
rename v21 contract4
rename v22 contract3

replace contract1 = subinstr(contract1, "bA3C4RbGp4qyxNd8r: ", "", .)
replace contract2 = subinstr(contract2, "a8KyZ6vGhmwWDy49w: ", "", .)
replace contract3 = subinstr(contract3, "wcSktGY9a9osHvkER: ", "", .)
replace contract4 = subinstr(contract4, "sCuazYT8DK6Whs4vT: ", "", .)
replace contract5 = subinstr(contract5, "GHwEzPxo3P5cwvMte: ", "", .)

destring contract1, replace
destring contract2, replace
destring contract3, replace
destring contract4, replace
destring contract5, replace

gen clockstring = time

*collapse at the level of the minutes
collapse (first) contract1 contract2 contract3 contract4 contract5,  by(clockstring)

***generate contracts 0, 60, 80, 100, 120 & 180 (symmetric max.!)
g contract0=0
egen contract2_=rowtotal(contract1 - contract2)
egen contract3_=rowtotal(contract1 - contract3)
egen contract4_=rowtotal(contract1 - contract4)
egen contract5_=rowtotal(contract1 - contract5)

drop contract2 contract3 contract4 contract5
ren contract1 contract60
ren contract2_ contract80
ren contract3_ contract100
ren contract4_ contract120
ren contract5_ contract180

*reshape the values so 
list
reshape long contract, i(clockstring) j(probability)
set more off

*round two digits
g contract_r = contract*1000
g contract_r2= round(contract_r, 1)

drop contract contract_r
ren contract_r2 contract

*reshape again but using probabilities
 reshape wide probability, i(clockstring) j(contract) 
set more off

 *fill will the probabilities we are looking for
g probability10000=.
g probability50000=.
g probability90000=.

*reshape so we can do the mipolation
 reshape long probability, i(clockstring) j(contract) 
 set more off
 
*set back the digits
g contract_r= contract/1000
drop contract
ren contract_r contract 

*mipolation, go, go
by clockstring: mipolate probability contract, pchip generate(i_probability)
drop probability
ren i_probability probability

*let's keep only the interesting information...

drop if contract <10
drop if contract>10 & contract<50
drop if contract>50 & contract<90
drop if contract>90

*...so we can then reshape properly (last time I reshape, promise)
 reshape wide probability, i(clockstring) j(contract) 

*reverse the scale if referendum
gen p10=probability90
gen p50=probability50
gen p90=probability10

*and to the daily data
gen double daily = date(clockstring, "YMDhm")
format daily %td
collapse (mean) p10 p50 p90, by(daily)

***Comparison with result "Effektive Zuwanderung"
*https://www.sem.admin.ch/sem/de/home/publiservice/statistik/auslaenderstatistik/monitor.html
gen result2020=106.231
gen result2019=107.387
*tsforecasts (see below)
gen drift=106.2599
gen _smooth=107.8192
gen _arima=107.1514

gen country="Switzerland"
gen issue="immigration"

save "Switzerland_immigration2020", replace


****************************************************************
***GERMANY ASYLUM***
import delimited "migration_2020_anonymized.csv", encoding(ISO-8859-1)clear
*insert data into excel comma-seperated, delete the "
drop if market_id!="xmwm5w3wcEFKuhhWb"
drop if set_id!="ijQhBTyzYf4mB3JSC"
rename prices contract1
rename v19 contract4
rename v20 contract2
rename v21 contract5
rename v22 contract3

replace contract1 = subinstr(contract1, "7eeqQd6FTDZbyKzzQ: ", "", .)
replace contract2 = subinstr(contract2, "Xe8Pv9SeepPD7idWC: ", "", .)
replace contract3 = subinstr(contract3, "obhyDsqW79WkyaGaf: ", "", .)
replace contract4 = subinstr(contract4, "C9vuZYEMtfiza8pco: ", "", .)
replace contract5 = subinstr(contract5, "jf7MyRXm4Aaro4Ab8: ", "", .)

destring contract1, replace
destring contract2, replace
destring contract3, replace
destring contract4, replace
destring contract5, replace

gen clockstring = time

*collapse at the level of the minutes
collapse (first) contract1 contract2 contract3 contract4 contract5,  by(clockstring)

***generate contracts 0, 100, 125, 150, 175 & 275 (symmetric max.!)
g contract0=0
egen contract2_=rowtotal(contract1 - contract2)
egen contract3_=rowtotal(contract1 - contract3)
egen contract4_=rowtotal(contract1 - contract4)
egen contract5_=rowtotal(contract1 - contract5)

drop contract2 contract3 contract4 contract5
ren contract1 contract100
ren contract2_ contract125
ren contract3_ contract150
ren contract4_ contract175
ren contract5_ contract275

*reshape the values so 
list
reshape long contract, i(clockstring) j(probability)
set more off

*round two digits
g contract_r = contract*1000
g contract_r2= round(contract_r, 1)

drop contract contract_r
ren contract_r2 contract

*reshape again but using probabilities
 reshape wide probability, i(clockstring) j(contract) 
set more off

 *fill will the probabilities we are looking for
g probability10000=.
g probability50000=.
g probability90000=.

*reshape so we can do the mipolation
 reshape long probability, i(clockstring) j(contract) 
 set more off
 
*set back the digits
g contract_r= contract/1000
drop contract
ren contract_r contract 

*mipolation, go, go
by clockstring: mipolate probability contract, pchip generate(i_probability)
drop probability
ren i_probability probability

*let's keep only the interesting information...

drop if contract <10
drop if contract>10 & contract<50
drop if contract>50 & contract<90
drop if contract>90

*...so we can then reshape properly (last time I reshape, promise)
 reshape wide probability, i(clockstring) j(contract) 

*reverse the scale if referendum
gen p10=probability90
gen p50=probability50
gen p90=probability10

*and to the daily data
gen double daily = date(clockstring, "YMDhm")
format daily %td
collapse (mean) p10 p50 p90, by(daily)

***Comparison with result
*https://ec.europa.eu/eurostat/databrowser/view/tps00191/default/table?lang=en
gen result2019=142.450
gen result2020=103
*tsforecasts (see below)
gen drift=153.695
gen _smooth=218.8277
gen _arima=151.5971

gen country="Germany"
gen issue="asylum"

save "Germany_asylum2020", replace


****************************************************************
***SPAIN ASYLUM***
import delimited "migration_2020_anonymized.csv", encoding(ISO-8859-1)clear
*insert data into excel comma-seperated, delete the "
drop if market_id!="xmwm5w3wcEFKuhhWb"
drop if set_id!="Q2nyGXgeChzwLJXff"
rename prices contract5
rename v19 contract3
rename v20 contract4
rename v21 contract1
rename v22 contract2

replace contract1 = subinstr(contract1, "tYDf8twSdG2wNNAr2: ", "", .)
replace contract2 = subinstr(contract2, "vPmkP2xmwS6mjMZ7Q: ", "", .)
replace contract3 = subinstr(contract3, "EoFwjoertA63tbLFu: ", "", .)
replace contract4 = subinstr(contract4, "oSmjB4g6jiugwYSSt: ", "", .)
replace contract5 = subinstr(contract5, "9WRrdWGE5nsG4DmXW: ", "", .)

destring contract1, replace
destring contract2, replace
destring contract3, replace
destring contract4, replace
destring contract5, replace

gen clockstring = time

*collapse at the level of the minutes
collapse (first) contract1 contract2 contract3 contract4 contract5,  by(clockstring)

***generate contracts 0, 60, 80, 100, 120 & 180 (symmetric max.!)
g contract0=0
egen contract2_=rowtotal(contract1 - contract2)
egen contract3_=rowtotal(contract1 - contract3)
egen contract4_=rowtotal(contract1 - contract4)
egen contract5_=rowtotal(contract1 - contract5)

drop contract2 contract3 contract4 contract5
ren contract1 contract60
ren contract2_ contract80
ren contract3_ contract100
ren contract4_ contract120
ren contract5_ contract180

*reshape the values so 
list
reshape long contract, i(clockstring) j(probability)
set more off

*round two digits
g contract_r = contract*1000
g contract_r2= round(contract_r, 1)

drop contract contract_r
ren contract_r2 contract

*reshape again but using probabilities
 reshape wide probability, i(clockstring) j(contract) 
set more off

 *fill will the probabilities we are looking for
g probability10000=.
g probability50000=.
g probability90000=.

*reshape so we can do the mipolation
 reshape long probability, i(clockstring) j(contract) 
 set more off
 
*set back the digits
g contract_r= contract/1000
drop contract
ren contract_r contract 

*mipolation, go, go
by clockstring: mipolate probability contract, pchip generate(i_probability)
drop probability
ren i_probability probability

*let's keep only the interesting information...
drop if contract <10
drop if contract>10 & contract<50
drop if contract>50 & contract<90
drop if contract>90

*...so we can then reshape properly (last time I reshape, promise)
 reshape wide probability, i(clockstring) j(contract) 

*reverse the scale if referendum
gen p10=probability90
gen p50=probability50
gen p90=probability10

*and to the daily data
gen double daily = date(clockstring, "YMDhm")
format daily %td
collapse (mean) p10 p50 p90, by(daily)

***Comparison with result
*https://ec.europa.eu/eurostat/databrowser/view/tps00191/default/table?lang=engen result2019=143
gen result2020=86.4
gen result2019=115.2
*tsforecasts (see below)
gen drift=127.68889
gen _smooth=65.912
gen _arima=164.7912

gen country="Spain"
gen issue="asylum"

save "Spain_asylum2020", replace


****************************************************************
***SWITZERLAND ASYLUM***
import delimited "migration_2020_anonymized.csv", encoding(ISO-8859-1)clear
*insert data into excel comma-seperated, delete the "
drop if market_id!="xmwm5w3wcEFKuhhWb"
drop if set_id!="qs8eLMhZM39TvqZ5g"
rename prices contract5
rename v19 contract2
rename v20 contract4
rename v21 contract1
rename v22 contract3

replace contract1 = subinstr(contract1, "q3uRosdF6cCzkASbR: ", "", .)
replace contract2 = subinstr(contract2, "CktadqZKd62FQfx6J: ", "", .)
replace contract3 = subinstr(contract3, "svM9KKL3LtzG7aJiR: ", "", .)
replace contract4 = subinstr(contract4, "i24RTFkwLhaDkhBxQ: ", "", .)
replace contract5 = subinstr(contract5, "2CpNoiFN4kFqFidfi: ", "", .)

destring contract1, replace
destring contract2, replace
destring contract3, replace
destring contract4, replace
destring contract5, replace

gen clockstring = time


*collapse at the level of the minutes
collapse (first) contract1 contract2 contract3 contract4 contract5,  by(clockstring)

***generate contracts 0, 10, 12.5, 15, 17.5 & 27.5 (symmetric max.!)
g contract0=0
egen contract2_=rowtotal(contract1 - contract2)
egen contract3_=rowtotal(contract1 - contract3)
egen contract4_=rowtotal(contract1 - contract4)
egen contract5_=rowtotal(contract1 - contract5)

drop contract2 contract3 contract4 contract5
ren contract1 contract10000
ren contract2_ contract12500
ren contract3_ contract15000
ren contract4_ contract17500
ren contract5_ contract27500

*reshape the values so 
list
reshape long contract, i(clockstring) j(probability)
set more off

*round two digits
g contract_r = contract*1000
g contract_r2= round(contract_r, 1)

drop contract contract_r
ren contract_r2 contract

*reshape again but using probabilities
 reshape wide probability, i(clockstring) j(contract) 
set more off

 *fill will the probabilities we are looking for
g probability10000=.
g probability50000=.
g probability90000=.

*reshape so we can do the mipolation
 reshape long probability, i(clockstring) j(contract) 
 set more off
 
*set back the digits
g contract_r= contract/1000
drop contract
ren contract_r contract 

*mipolation, go, go
by clockstring: mipolate probability contract, pchip generate(i_probability)
drop probability
ren i_probability probability

*let's keep only the interesting information...

drop if contract <10
drop if contract>10 & contract<50
drop if contract>50 & contract<90
drop if contract>90

*...so we can then reshape properly (last time I reshape, promise)
 reshape wide probability, i(clockstring) j(contract) 

*reverse the scale if referendum
gen p10=probability90
gen p50=probability50
gen p90=probability10

*and to the daily data
gen double daily = date(clockstring, "YMDhm")
format daily %td
collapse (mean) p10 p50 p90, by(daily)

***Comparison with result
*https://www.sem.admin.ch/sem/de/home/publiservice/statistik/asylstatistik/archiv/2020.html
gen result2020=11041
gen result2019=14269
*tsforecasts (see below)
gen drift=14124.78
gen _smooth=18183.08
gen _arima=14130.33

gen country="Switzerland"
gen issue="asylum"

save "Switzerland_asylum2020", replace


****************************************************************
***UK ASYLUM***
import delimited "migration_2020_anonymized.csv", encoding(ISO-8859-1)clear
*insert data into excel comma-seperated, delete the "
drop if market_id!="xmwm5w3wcEFKuhhWb"
drop if set_id!="pSTKTEsfpvWf43CQa"
rename prices contract3
rename v19 contract2
rename v20 contract5
rename v21 contract1
rename v22 contract4

replace contract1 = subinstr(contract1, "bMur5N7fsbcBky8hm: ", "", .)
replace contract2 = subinstr(contract2, "EgtbWKWvdZT7kyYeY: ", "", .)
replace contract3 = subinstr(contract3, "AE8MzEQ42iwrbqEAp: ", "", .)
replace contract4 = subinstr(contract4, "jjQ2gaPXcMsZxLDyQ: ", "", .)
replace contract5 = subinstr(contract5, "T9Lw6mHndTHfFsRiS: ", "", .)

destring contract1, replace
destring contract2, replace
destring contract3, replace
destring contract4, replace
destring contract5, replace

gen clockstring = time

*collapse at the level of the minutes
collapse (first) contract1 contract2 contract3 contract4 contract5,  by(clockstring)

***generate contracts 0, 30, 35, 40, 45 & 75 (symmetric max.!)
g contract0=0
egen contract2_=rowtotal(contract1 - contract2)
egen contract3_=rowtotal(contract1 - contract3)
egen contract4_=rowtotal(contract1 - contract4)
egen contract5_=rowtotal(contract1 - contract5)

drop contract2 contract3 contract4 contract5
ren contract1 contract30
ren contract2_ contract35
ren contract3_ contract40
ren contract4_ contract45
ren contract5_ contract75

*reshape the values so 
list
reshape long contract, i(clockstring) j(probability)
set more off

*round two digits
g contract_r = contract*1000
g contract_r2= round(contract_r, 1)

drop contract contract_r
ren contract_r2 contract

*reshape again but using probabilities
 reshape wide probability, i(clockstring) j(contract) 
set more off

 *fill will the probabilities we are looking for
g probability10000=.
g probability50000=.
g probability90000=.

*reshape so we can do the mipolation
 reshape long probability, i(clockstring) j(contract) 
 set more off
 
*set back the digits
g contract_r= contract/1000
drop contract
ren contract_r contract 

*mipolation, go, go
by clockstring: mipolate probability contract, pchip generate(i_probability)
drop probability
ren i_probability probability

*let's keep only the interesting information...
drop if contract <10
drop if contract>10 & contract<50
drop if contract>50 & contract<90
drop if contract>90

*...so we can then reshape properly (last time I reshape, promise)
 reshape wide probability, i(clockstring) j(contract) 

*reverse the scale if referendum
gen p10=probability90
gen p50=probability50
gen p90=probability10

*and to the daily data
gen double daily = date(clockstring, "YMDhm")
format daily %td
collapse (mean) p10 p50 p90, by(daily)

***Comparison with result
*Table in dropbox folder
gen result2019=35.737
gen result2020=29
*tsforecasts (see below)
gen drift=37.71711
gen _smooth=31.35188
gen _arima=36.34441

gen country="UK"
gen issue="asylum"

save "UK_asylum2020", replace


****************************************************************
***Germany ASYLUM Groups***
import delimited "migration_2020_anonymized.csv", encoding(ISO-8859-1)clear
*insert data into excel comma-seperated, delete the "
drop if market_id!="xmwm5w3wcEFKuhhWb"
drop if set_id!="SMR5DNa8MGyPPRB8N"
rename prices Irak
rename v19 Turkey
rename v20 Iran
rename v21 Syria
rename v22 Other
rename v23 Afghanistan

replace Syria = subinstr(Syria, "PW4RYTo2ywjiRTXJ9: ", "", .)
replace Irak = subinstr(Irak, "3C5pm5xDhEy6AoA8q: ", "", .)
replace Afghanistan = subinstr(Afghanistan, "pruas9QMe7L6NecDd: ", "", .)
replace Turkey = subinstr(Turkey, "9kaz6WaKbBAxg5toj: ", "", .)
replace Iran = subinstr(Iran, "MyTRab7sSJxEhREj8: ", "", .)
replace Other = subinstr(Other, "gpBBt84xcSBH6R6d4: ", "", .)

destring Syria, replace
destring Irak, replace
destring Afghanistan, replace
destring Turkey, replace
destring Iran, replace
destring Other, replace

gen clockstring = time

*and to the daily data
gen double daily = date(clockstring, "YMDhm")
format daily %td
collapse (mean) Syria Irak Afghanistan Turkey Iran Other, by(daily)

***Comparison with result
*https://ec.europa.eu/eurostat/statistics-explained/index.php?title=File:Table_1_Five_main_citizenships_of_first-time_asylum_applicants_(non-EU_citizens),_2020_(number,_rounded_figures)_v2.png
gen Syria_abs=36435
gen Irak_abs=9845
gen Afghanistan_abs=9900
gen Turkey_abs=5780
gen Iran_abs=3120

gen country="Germany"
gen issue="asgroups"

save "Germany_asgroups2020", replace


****************************************************************
***Spain ASYLUM Groups***
import delimited "migration_2020_anonymized.csv", encoding(ISO-8859-1)clear
*insert data into excel comma-seperated, delete the "
drop if market_id!="xmwm5w3wcEFKuhhWb"
drop if set_id!="PgsodWj3FRje9YnGG"
rename prices Honduras
rename v19 Nicaragua
rename v20 Venezuela
rename v21 ElSalvador
rename v22 Other
rename v23 Colombia

replace Venezuela = subinstr(Venezuela, "JWcxKGRqFRasoxcSH: ", "", .)
replace Colombia = subinstr(Colombia, "vv637NZH27pQFktpD: ", "", .)
replace Honduras = subinstr(Honduras, "DMPbDr2eXreKwrA97: ", "", .)
replace Nicaragua = subinstr(Nicaragua, "DcWdjF3FodCgdeyqF: ", "", .)
replace ElSalvador = subinstr(ElSalvador, "MNXTRjTAyHF4baZaq: ", "", .)
replace Other = subinstr(Other, "Qv9A3k3gaKMbfYPgk: ", "", .)

destring Venezuela, replace
destring Colombia, replace
destring Honduras, replace
destring Nicaragua, replace
destring ElSalvador, replace
destring Other, replace

gen clockstring = time

*and to the daily data
gen double daily = date(clockstring, "YMDhm")
format daily %td
collapse (mean) Venezuela Colombia Honduras Nicaragua ElSalvador Other, by(daily)

***Comparison with result
*https://ec.europa.eu/eurostat/statistics-explained/index.php?title=File:Table_1_Five_main_citizenships_of_first-time_asylum_applicants_(non-EU_citizens),_2020_(number,_rounded_figures)_v2.png
gen Venezuela_abs=28065
gen Colombia_abs=27180
gen Honduras_abs=5465
gen Nicaragua_abs=3680
gen ElSalvador_abs=2475

gen country="Spain"
gen issue="asgroups"

save "Spain_asgroups2020", replace


****************************************************************
***Switzerland ASYLUM Groups***
import delimited "migration_2020_anonymized.csv", encoding(ISO-8859-1)clear
*insert data into excel comma-seperated, delete the "
drop if market_id!="xmwm5w3wcEFKuhhWb"
drop if set_id!="5MikQ957MPj5Xok48"
rename prices Irak
rename v19 Afghanistan
rename v20 Algeria
rename v21 Syria
rename v22 Other
rename v23 Eritrea

replace Syria = subinstr(Syria, "mBXmEWgaX3rgYCQMS: ", "", .)
replace Irak = subinstr(Irak, "4SC6e3fT2aHQTEC9p: ", "", .)
replace Afghanistan = subinstr(Afghanistan, "bt2TPvrGZutrQrhnt: ", "", .)
replace Eritrea = subinstr(Eritrea, "w5roE8HE2juzEgACB: ", "", .)
replace Algeria = subinstr(Algeria, "g72cJ8Ga9Ga9vWM8p: ", "", .)
replace Other = subinstr(Other, "p2AZRZPWQWjNDSLXz: ", "", .)

destring Syria, replace
destring Irak, replace
destring Afghanistan, replace
destring Eritrea, replace
destring Algeria, replace
destring Other, replace

gen clockstring = time

*and to the daily data
gen double daily = date(clockstring, "YMDhm")
format daily %td
collapse (mean) Syria Irak Afghanistan Eritrea Algeria Other, by(daily)

***Comparison with result
*https://ec.europa.eu/eurostat/statistics-explained/index.php?title=File:Table_1_Five_main_citizenships_of_first-time_asylum_applicants_(non-EU_citizens),_2020_(number,_rounded_figures)_v2.png
gen Syria_abs=755
gen Irak_abs=270
gen Afghanistan_abs=1630
gen Eritrea_abs=1635
gen Algeria_abs=935

gen country="Switzerland"
gen issue="asgroups"

save "Switzerland_asgroups2020", replace


****************************************************************
***UK ASYLUM Groups***
import delimited "migration_2020_anonymized.csv", encoding(ISO-8859-1)clear
*insert data into excel comma-seperated, delete the "
drop if market_id!="xmwm5w3wcEFKuhhWb"
drop if set_id!="9KeMqZhmfb4vsjaBa"
rename prices Other
rename v19 Pakistan
rename v20 Iran
rename v21 Albania
rename v22 Eritrea
rename v23 Irak

replace Pakistan = subinstr(Pakistan, "N4q88vnf6ha5pQNeJ: ", "", .)
replace Irak = subinstr(Irak, "uhTyAPqsYryCoS89o: ", "", .)
replace Albania = subinstr(Albania, "ij9FsFiDCyiboqauT: ", "", .)
replace Eritrea = subinstr(Eritrea, "ndsX2oPnohNsqqBzp: ", "", .)
replace Iran = subinstr(Iran, "i6MR9oqacDhuywFWh: ", "", .)
replace Other = subinstr(Other, "JfQ3FaHeDrekEEnmW: ", "", .)

destring Pakistan, replace
destring Irak, replace
destring Albania, replace
destring Eritrea, replace
destring Iran, replace
destring Other, replace

gen clockstring = time

*and to the daily data
gen double daily = date(clockstring, "YMDhm")
format daily %td
collapse (mean) Pakistan Irak Albania Eritrea Iran Other, by(daily)
tsset daily
tsline Pakistan Irak Albania Eritrea Iran Other

***Comparison with result
*https://ec.europa.eu/eurostat/statistics-explained/index.php?title=File:Table_1_Five_main_citizenships_of_first-time_asylum_applicants_(non-EU_citizens),_2020_(number,_rounded_figures)_v2.png
gen Pakistan_abs=1205
gen Irak_abs=2304
gen Albania_abs=2784
gen Eritrea_abs=2496
gen Iran_abs=3847

gen country="UK"
gen issue="asgroups"

save "UK_asgroups2020", replace


**********TIME SERIES FORECASTS FOR ASYLUM APPLICATIONS**********
import excel "/Users/admin/Dropbox/Sampling_2020JanuaryFebruary/immigration_ts.xlsx", sheet("Tabelle1") firstrow clear

drop if year>2019

tsset year
tsappend, add(1)

*TIME SERIES METHOD?
twoway connected imm_ch year
twoway connected imm_de year
twoway connected imm_es year

*SIMPLE FORECASTS
*gen naive
gen naive_imm_ch=l.imm_ch
gen naive_imm_de=l.imm_de
gen naive_imm_es=l.imm_es
*drift(?)=yt-1+c, where c=(yT−yt-1)/(T−1)
gen c=(106231-118629)/(2019-2008)
gen drift_imm_ch=l.imm_ch+c
gen d=(1345943-573815)/(2019-2008)
gen drift_imm_de=l.imm_de+d
gen e=(666022-567372)/(2019-2008)
gen drift_imm_es=l.imm_es+e
*simple exponential smoothing
tssmooth exponential smooth_imm_ch=imm_ch, parms(.4) forecast(1)
tssmooth exponential smooth_imm_de=imm_de, parms(.4) forecast(1)
tssmooth exponential smooth_imm_es=imm_es, parms(.4) forecast(1)

*CREATE AN ARIMA MODEL
arima imm_ch, arima(1,1,1)
predict arima_imm_ch, dynamic(2020) y
label variable arima_imm_ch arima_imm_ch
arima imm_de, arima(1,1,1)
predict arima_imm_de, dynamic(2020) y
label variable arima_imm_de arima_imm_de
arima imm_es, arima(1,1,1)
predict arima_imm_es, dynamic(2020) y
label variable arima_imm_es arima_imm_es


**********TIME SERIES FORECASTS FOR ASYLUM APPLICATIONS**********
import excel "/Users/admin/Dropbox/Sampling_2020JanuaryFebruary/asylum_applications_ts.xlsx", sheet("Tabelle1") firstrow clear

drop if year>2019

tsset year
tsappend, add(1)

*SIMPLE FORECASTS
*gen naive
gen naive_asyl_ch=l.Switzerland
gen naive_asyl_de=l.Germany
gen naive_asyl_es=l.Spain
gen naive_asyl_uk=l.UK
*drift(?)=yt-1+c, where c=(yT−yt-1)/(T−1)
gen c=(14269-15567)/(2019-2010)
gen drift_asyl_ch=l.Switzerland+c
gen d=(142450-41245)/(2019-2010)
gen drift_asyl_de=l.Germany+d
gen e=(115175-2550)/(2019-2010)
gen drift_asyl_es=l.Spain+e
gen f=(35737-17916)/(2019-2010)
gen drift_asyl_uk=l.UK+f
*simple exponential smoothing
tssmooth exponential smooth_asyl_ch=Switzerland, parms(.4) forecast(1)
tssmooth exponential smooth_asyl_de=Germany, parms(.4) forecast(1)
tssmooth exponential smooth_asyl_es=Spain, parms(.4) forecast(1)
tssmooth exponential smooth_asyl_uk=UK, parms(.4) forecast(1)

*CREATE AN ARIMA MODEL
arima Switzerland, arima(1,1,1)
predict arima_asyl_ch, dynamic(2020) y
label variable arima_asyl_ch arima_asyl_ch
arima Germany, arima(1,1,1)
predict arima_asyl_de, dynamic(2020) y
label variable arima_asyl_de arima_asyl_de
arima Spain, arima(1,1,1)
predict arima_asyl_es, dynamic(2020) y
label variable arima_asyl_es arima_asyl_es
arima UK, arima(1,1,1)
predict arima_asyl_uk, dynamic(2020) y
label variable arima_asyl_uk arima_asyl_uk


***********Merge the datasets
clear
cd "/Users/admin/Dropbox/PM_alberto_oliver/projects/migration2020"
use "Germany_immigration2020.dta", clear

append using "Spain_immigration2020.dta"
append using "Switzerland_immigration2020.dta"
append using "Germany_asylum2020.dta"
append using "Spain_asylum2020.dta"
append using "Switzerland_asylum2020.dta"
append using "UK_asylum2020.dta"

save "pm_tseries_migration2020.dta", replace

*Recodings

encode issue, gen(panel)
set obs `=_N+1' 
replace panel=1 if panel==.
set obs `=_N+1' 
replace panel=2 if panel==.
set obs `=_N+1' 
replace panel=3 if panel==.
set obs `=_N+1' 
replace panel=4 if panel==.
set obs `=_N+1' 
replace panel=5 if panel==.
set obs `=_N+1' 
replace panel=6 if panel==.
set obs `=_N+1' 
replace panel=7 if panel==.
set obs `=_N+1' 
replace panel=8 if panel==.
set obs `=_N+1' 
replace panel=9 if panel==.
set obs `=_N+1' 
replace panel=10 if panel==.
set obs `=_N+1' 
replace panel=11 if panel==.
set obs `=_N+1' 
replace panel=12 if panel==.
set obs `=_N+1' 
replace panel=13 if panel==.
set obs `=_N+1' 
replace panel=14 if panel==.
set obs `=_N+1' 
replace panel=15 if panel==.
set obs `=_N+1' 
replace panel=16 if panel==.
set obs `=_N+1' 
replace panel=17 if panel==.
set obs `=_N+1' 
replace panel=18 if panel==.
set obs `=_N+1' 
replace panel=19 if panel==.
set obs `=_N+1' 
replace panel=20 if panel==.
set obs `=_N+1' 
replace panel=21 if panel==.
set obs `=_N+1' 
replace panel=22 if panel==.

drop if daily<22067
drop if daily>22096

collapse (mean) p10 p50 p90 result2019 result2020 drift _smooth _arima, by(issue country)

***TABLE 1
gen nr = _n
tsset nr
fcstats result2020 p50
fcstats result2020 result2019 
fcstats result2020 drift 
fcstats result2020 _smooth
fcstats result2020 _arima

*putting CH on same scale: asylum
replace p10=p10/100 if country=="Switzerland" & issue=="asylum"
replace p50=p50/100 if country=="Switzerland" & issue=="asylum"
replace p90=p90/100 if country=="Switzerland" & issue=="asylum"
replace result2019=result2019/100 if country=="Switzerland" & issue=="asylum"
replace result2020=result2020/100 if country=="Switzerland" & issue=="asylum"
replace drift=drift/100 if country=="Switzerland" & issue=="asylum"
replace _smooth=_smooth/100 if country=="Switzerland" & issue=="asylum"
replace _arima=_arima/100 if country=="Switzerland" & issue=="asylum"

*putting CH on same scale: immigration
replace p10=p10*10 if country=="Switzerland" & issue=="immigration"
replace p50=p50*10 if country=="Switzerland" & issue=="immigration"
replace p90=p90*10 if country=="Switzerland" & issue=="immigration"
replace result2019=result2019*10 if country=="Switzerland" & issue=="immigration"
replace result2020=result2020*10 if country=="Switzerland" & issue=="immigration"
replace drift=drift*10 if country=="Switzerland" & issue=="immigration"
replace _smooth=_smooth*10 if country=="Switzerland" & issue=="immigration"
replace _arima=_arima*10 if country=="Switzerland" & issue=="immigration"

label variable p10 "upper 80% bound"
label variable p90 "lower 80% bound"
label variable p50 "forecast"
label variable result2019 "Result 2019"
label variable result2020 "Result 2020"

*from wide to long
egen id=concat(country issue)
rename result2020 f0
rename p10 f1
rename p50 f2
rename p90 f3
rename result2019 f4
rename drift f5
rename _smooth f6
rename _arima f7
drop nr
reshape long f, i(id) j(nr)
gen type=.
replace type=1 if nr==0
replace type=2 if nr==1 | nr==2 | nr==3
replace type=3 if nr==4 | nr==5 | nr==6 | nr==7
save "pm_tseries_migration2020_part.dta", replace
*Asylum
drop if issue!="asylum"
drop id
egen id=concat(country type)
reshape wide f type country, i(id) j(nr) 
gen id2=country0 if country0!=""
replace id2=country1 if country1!=""
replace id2=country7 if country7!=""
*reshape long type, i(id2) j(pf)
gen method=1 if type0==1
replace method=2 if type2==2
replace method=3 if type4==3
*set scheme
set scheme tab1
set scheme white_tableau

***FIGURE 2
graph dot (asis) f0 f1 f2 f3 f4 f5 f6 f7, over(method) by(id2)

*Immigrations
use "pm_tseries_migration2020_part.dta", clear
drop if issue!="immigration"
drop id
egen id=concat(country type)
reshape wide f type country, i(id) j(nr) 
gen id2=country0 if country0!=""
replace id2=country1 if country1!=""
replace id2=country7 if country7!=""
*reshape long type, i(id2) j(pf)
gen method=1 if type0==1
replace method=2 if type2==2
replace method=3 if type4==3
*set scheme
set scheme tab1
set scheme white_tableau

***FIGURE 3
graph dot (asis) f0 f1 f2 f3 f4 f5 f6 f7, over(method) by(id2)


***
clear
cd "/Users/admin/Dropbox/PM_alberto_oliver/projects/migration2020"
use "Germany_asgroups2020.dta", clear
append using "Spain_asgroups2020.dta"
append using "Switzerland_asgroups2020.dta"
append using "UK_asgroups2020.dta"

drop if daily<22067
drop if daily>22096

collapse (mean) Syria Irak Afghanistan Turkey Iran Syria_abs Irak_abs ///
Afghanistan_abs Turkey_abs Iran_abs Venezuela Colombia Honduras Nicaragua ///
ElSalvador Venezuela_abs Colombia_abs Honduras_abs Nicaragua_abs ElSalvador_abs ///
Eritrea Algeria Eritrea_abs Algeria_abs Pakistan Albania Pakistan_abs ///
Albania_abs, by(country)
rename Syria prob1
rename Syria_abs abs1
rename Irak prob2
rename Irak_abs abs2
rename Afghanistan prob3
rename Afghanistan_abs abs3
rename Turkey prob4
rename Turkey_abs abs4
rename Iran prob5
rename Iran_abs abs5
rename Venezuela prob6
rename Venezuela_abs abs6
rename Colombia prob7
rename Colombia_abs abs7
rename Honduras prob8
rename Honduras_abs abs8
rename Nicaragua prob9
rename Nicaragua_abs abs9
rename ElSalvador prob10
rename ElSalvador_abs abs10
rename Eritrea prob11
rename Eritrea_abs abs11
rename Algeria prob12
rename Algeria_abs abs12
rename Pakistan prob13
rename Pakistan_abs abs13
rename Albania prob14
rename Albania abs14
reshape long prob abs, i(country)

rename _j group
label define groupl 1 "Syria" 2 "Iraq" 3 "Afghanistan" 4 "Turkey" 5 "Iran" ///
6 "Venezuela" 7 "Colombia" 8 "Honduras" 9 "Nicaragua" 10 "El Salvador" ///
11 "Eritrea" 12 "Algeria" 13 "Pakistan" 14 "Albania" 
label values group groupl  

*Regression
encode country, gen(cntry)
reg prob abs i.cntry
est store m1
esttab m1

gen abs1000=abs/1000 if country=="Germany" | country=="Spain"
replace abs1000=abs/100 if country=="Switzerland" | country=="UK"

replace country="Great Britain" if country=="UK"

rename prob Probability
label variable Probability "Forecast: Probability of being largest in %"
label variable abs1000 "Absolute in 1000s"

***FIGURE 4
twoway (scatter Probability abs1000 if country=="Great Britain", ytitle("Probability in %") mlabel(group)) (scatter Probability abs1000 if country=="Switzerland", ytitle("Probability in %") mlabel(group)) (scatter Probability abs1000 if country=="Germany", ytitle("Probability in %") mlabel(group)) (scatter Probability abs1000 if country=="Spain", ytitle("Probability in %") mlabel(group)) (lfit Probability abs1000, ) (lfit Probability abs1000 if country=="Great Britain", ) (lfit Probability abs1000 if country=="Switzerland", ) (lfit Probability abs1000 if country=="Germany", ) (lfit Probability abs1000 if country=="Spain", )
twoway (scatter Probability abs1000, by(country) ytitle("Probability in %") mlabel(group)) (lfit Probability abs1000, by(country))


*Time series for Switzerland
***Comparison with result "Effektive Zuwanderung"
*https://www.sem.admin.ch/sem/de/home/publiservice/statistik/auslaenderstatistik/monitor.html
use "Switzerland_immigration2020", clear
drop if daily<22069

tsset daily
tsline p50 result2020, ytitle("Immigrants in 1000s")




